<!DOCTYPE html>












  


<html class="theme-next mist use-motion" lang="zh-CN">
<head><meta name="generator" content="Hexo 3.8.0">
  <meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">


  <script>
  (function(i,s,o,g,r,a,m){i["DaoVoiceObject"]=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;a.charset="utf-8";m.parentNode.insertBefore(a,m)})(window,document,"script",('https:' == document.location.protocol ? 'https:' : 'http:') + "//widget.daovoice.io/widget/0f81ff2f.js","daovoice")
  daovoice('init', {
      app_id: "15fc8e87"
    });
  daovoice('update');
  </script>






  
  
    
    
  <script src="/lib/pace/pace.min.js?v=1.0.2"></script>
  <link href="/lib/pace/pace-theme-minimal.min.css?v=1.0.2" rel="stylesheet">







<meta http-equiv="Cache-Control" content="no-transform">
<meta http-equiv="Cache-Control" content="no-siteapp">






















<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css">

<link href="/css/main.css?v=6.3.0" rel="stylesheet" type="text/css">


  <link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=6.3.0">


  <link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=6.3.0">


  <link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=6.3.0">


  <link rel="mask-icon" href="/images/logo.svg?v=6.3.0" color="#222">









<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Mist',
    version: '6.3.0',
    sidebar: {"position":"left","display":"post","offset":12,"b2t":true,"scrollpercent":true,"onmobile":false},
    fancybox: false,
    fastclick: false,
    lazyload: false,
    tabs: true,
    motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>


  




  <meta name="description" content="七十年代时，有一长辈连练铁砂掌，功夫成了之后，可以掌断五砖，凌空碎砖，威风得不得了。时至八十年代，只能掌断三砖。到九十年代只能一砖一砖的断了。他说，一直以为功力退步了，后来才知道烧砖的配方改了。">
<meta name="keywords" content="Spark,Parquet,大数据">
<meta property="og:type" content="article">
<meta property="og:title" content="Spark与Apache-Parquet">
<meta property="og:url" content="http://blog.wangxc.club/2017/04/24/Spark与Apache-Parquet/index.html">
<meta property="og:site_name" content="冬与晨">
<meta property="og:description" content="七十年代时，有一长辈连练铁砂掌，功夫成了之后，可以掌断五砖，凌空碎砖，威风得不得了。时至八十年代，只能掌断三砖。到九十年代只能一砖一砖的断了。他说，一直以为功力退步了，后来才知道烧砖的配方改了。">
<meta property="og:locale" content="zh-CN">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/3167229-0ca5fe91bd99929e.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/3167229-7550885467f50a4c.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:image" content="http://upload-images.jianshu.io/upload_images/3167229-d37fea96d79e5e91.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">
<meta property="og:updated_time" content="2018-06-29T13:58:50.290Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Spark与Apache-Parquet">
<meta name="twitter:description" content="七十年代时，有一长辈连练铁砂掌，功夫成了之后，可以掌断五砖，凌空碎砖，威风得不得了。时至八十年代，只能掌断三砖。到九十年代只能一砖一砖的断了。他说，一直以为功力退步了，后来才知道烧砖的配方改了。">
<meta name="twitter:image" content="http://upload-images.jianshu.io/upload_images/3167229-0ca5fe91bd99929e.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240">






  <link rel="canonical" href="http://blog.wangxc.club/2017/04/24/Spark与Apache-Parquet/">



<script type="text/javascript" id="page.configurations">
  CONFIG.page = {
    sidebar: "",
  };
</script>

  <title>Spark与Apache-Parquet | 冬与晨</title>
  




<script async src="https://www.googletagmanager.com/gtag/js?id=UA-102769182-1"></script>
<script>
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('js', new Date());

  gtag('config', 'UA-102769182-1');
</script>



  <script type="text/javascript">
    var _hmt = _hmt || [];
    (function() {
      var hm = document.createElement("script");
      hm.src = "https://hm.baidu.com/hm.js?809d5610fca8eaff344d9a698536110d";
      var s = document.getElementsByTagName("script")[0];
      s.parentNode.insertBefore(hm, s);
    })();
  </script>




  <noscript>
  <style type="text/css">
    .use-motion .motion-element,
    .use-motion .brand,
    .use-motion .menu-item,
    .sidebar-inner,
    .use-motion .post-block,
    .use-motion .pagination,
    .use-motion .comments,
    .use-motion .post-header,
    .use-motion .post-body,
    .use-motion .collection-title { opacity: initial; }

    .use-motion .logo,
    .use-motion .site-title,
    .use-motion .site-subtitle {
      opacity: initial;
      top: initial;
    }

    .use-motion {
      .logo-line-before i { left: initial; }
      .logo-line-after i { right: initial; }
    }
  </style>
</noscript>

</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-CN">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail">
    <div class="headband">
    </div>
<a href="https://github.com/vector4wang"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub"></a>
    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/" class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">冬与晨</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
    
  </div>

  <div class="site-nav-toggle">
    <button aria-label="切换导航栏">
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>



<nav class="site-nav">
  
    <ul id="menu" class="menu">
      
        
        
        
          
          <li class="menu-item menu-item-home">
    <a href="/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-home"></i> <br>首页</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-about">
    <a href="/about/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-user"></i> <br>关于</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-tags">
    <a href="/tags/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-tags"></i> <br>标签</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-categories">
    <a href="/categories/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-th"></i> <br>分类</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-archives">
    <a href="/archives/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-archive"></i> <br>归档</a>
  </li>
        
        
        
          
          <li class="menu-item menu-item-commonweal">
    <a href="/404/" rel="section">
      <i class="menu-item-icon fa fa-fw fa-heartbeat"></i> <br>公益 404</a>
  </li>

      
      
    </ul>
  

  
    

  

  
</nav>



  



</div>
    </header>

    


    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://blog.wangxc.club/2017/04/24/Spark与Apache-Parquet/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="wangxc">
      <meta itemprop="description" content="革命尚未成功，同志还需努力~">
      <meta itemprop="image" content="/images/avatar.jpg">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="冬与晨">
    </span>

    
      <header class="post-header">

        
        
          <h2 class="post-title" itemprop="name headline">Spark与Apache-Parquet
              
            
          </h2>
        

        <div class="post-meta">
          <span class="post-time">

            
            
            

            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              

              
                
              

              <time title="创建时间：2017-04-24 22:31:16" itemprop="dateCreated datePublished" datetime="2017-04-24T22:31:16+08:00">2017-04-24</time>
            

            
              

              
                
                <span class="post-meta-divider">|</span>
                

                <span class="post-meta-item-icon">
                  <i class="fa fa-calendar-check-o"></i>
                </span>
                
                  <span class="post-meta-item-text">更新于</span>
                
                <time title="修改时间：2018-06-29 21:58:50" itemprop="dateModified" datetime="2018-06-29T21:58:50+08:00">2018-06-29</time>
              
            
          </span>

          
            <span class="post-category">
            
              <span class="post-meta-divider">|</span>
            
              <span class="post-meta-item-icon">
                <i class="fa fa-folder-o"></i>
              </span>
              
                <span class="post-meta-item-text">分类于</span>
              
              
                <span itemprop="about" itemscope itemtype="http://schema.org/Thing"><a href="/categories/技术/" itemprop="url" rel="index"><span itemprop="name">技术</span></a></span>

                
                
              
            </span>
          

          
            
              <span class="post-comments-count">
                <span class="post-meta-divider">|</span>
                <span class="post-meta-item-icon">
                  <i class="fa fa-comment-o"></i>
                </span>
                <a href="/2017/04/24/Spark与Apache-Parquet/#comments" itemprop="discussionUrl">
                  <span class="post-meta-item-text">评论数：</span> <span class="post-comments-count valine-comment-count" data-xid="/2017/04/24/Spark与Apache-Parquet/" itemprop="commentCount"></span>
                </a>
              </span>
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <blockquote>
<p>七十年代时，有一长辈连练铁砂掌，功夫成了之后，可以掌断五砖，凌空碎砖，威风得不得了。时至八十年代，只能掌断三砖。到九十年代只能一砖一砖的断了。他说，一直以为功力退步了，后来才知道烧砖的配方改了。</p>
</blockquote>
<a id="more"></a>


<p><img src="http://upload-images.jianshu.io/upload_images/3167229-0ca5fe91bd99929e.jpg?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="数据压缩"></p>
<h3 id="前言"><a href="#前言" class="headerlink" title="前言"></a>前言</h3><p>　　前两篇将了spark的部署和一些简单的实例<a href="http://www.jianshu.com/p/52d0f043cc04" target="_blank" rel="noopener">Spark初体验(步骤超详细)</a>和<a href="http://www.jianshu.com/p/69d4547688c9" target="_blank" rel="noopener">Spark再体验之springboot整合spark</a>。我相信前两篇会对刚入门的sparker来说会有一些启发。<br>　　今天在使用spark去加载200万条数据的时候，服务器提示<strong>内存分配不足</strong>(服务器配置较低)，这里我就在想有没有什么方法将数据压缩压缩再压缩呢？网上查资料，问他人，最后看到并使用了<code>Apache Parquet</code><a href="http://parquet.apache.org/" target="_blank" rel="noopener">官网</a>，这里简单的介绍一下parquet</p>
<h3 id="Parquet"><a href="#Parquet" class="headerlink" title="Parquet"></a>Parquet</h3><blockquote>
<p>　　Parquet是面向分析型业务的<strong>列式存储</strong>格式,由Twitter和Cloudera合作开发，2015年5月从Apache的孵化器里毕业成为Apache顶级项目.<br>　　当时Twitter的日增数据量达到压缩之后的100TB+，存储在HDFS上，工程师会使用多种计算框架（例如MapReduce, Hive, Pig等）对这些数据做分析和挖掘；日志结构是复杂的嵌套数据类型，例如一个典型的日志的schema有87列，嵌套了7层。所以需要设计一种列式存储格式，既能支持关系型数据（简单数据类型），又能支持复杂的嵌套类型的数据，同时能够适配多种数据处理框架。</p>
</blockquote>
<p>通过阅读<a href="https://www.ibm.com/developerworks/cn/analytics/blog/5-reasons-to-choose-parquet-for-spark-sql/index.html" target="_blank" rel="noopener">这篇文章</a>，相信你会对parquet的优点有所了解。<br>网上关于它的介绍和算法的讲解一大推，简单的讲使用parquet存储数据有以下优点：</p>
<ul>
<li>压缩数据</li>
<li>不失真</li>
<li>减少IO吞吐量</li>
<li>高效的查询</li>
</ul>
<p>更重要的是spark和parquet简直是绝配，犹如小葱拌豆腐、京酱肉丝陪面皮、手擀面配大蒜。。。</p>
<h3 id="示例"><a href="#示例" class="headerlink" title="示例"></a>示例</h3><p>老规矩，先上pom</p>
<figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br></pre></td><td class="code"><pre><span class="line">...</span><br><span class="line"><span class="tag">&lt;<span class="name">parquet.version</span>&gt;</span>1.7.0<span class="tag">&lt;/<span class="name">parquet.version</span>&gt;</span></span><br><span class="line">...</span><br><span class="line"><span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>org.apache.parquet<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>parquet-common<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">version</span>&gt;</span>$&#123;parquet.version&#125;<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>org.apache.parquet<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>parquet-encoding<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">version</span>&gt;</span>$&#123;parquet.version&#125;<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>org.apache.parquet<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>parquet-column<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">version</span>&gt;</span>$&#123;parquet.version&#125;<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">dependency</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">groupId</span>&gt;</span>org.apache.parquet<span class="tag">&lt;/<span class="name">groupId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">artifactId</span>&gt;</span>parquet-hadoop<span class="tag">&lt;/<span class="name">artifactId</span>&gt;</span></span><br><span class="line">            <span class="tag">&lt;<span class="name">version</span>&gt;</span>$&#123;parquet.version&#125;<span class="tag">&lt;/<span class="name">version</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;/<span class="name">dependency</span>&gt;</span></span><br></pre></td></tr></table></figure>

<p>下面使用spark对mysql的一张表做parquet的读写操作<br>将整张表的数据存储为parquet格式的文件</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">static</span> String url = <span class="string">"jdbc:mysql://localhost:3306/world?"</span> +</span><br><span class="line">            <span class="string">"useUnicode=true&amp;characterEncoding=UTF-8"</span> +</span><br><span class="line">            <span class="string">"&amp;zeroDateTimeBehavior=convertToNull"</span>;</span><br><span class="line"><span class="keyword">static</span> String table = <span class="string">"jd_trainingdata"</span>;</span><br><span class="line"><span class="keyword">static</span> String username = <span class="string">"root"</span>;</span><br><span class="line"><span class="keyword">static</span> String passwd = <span class="string">"root"</span>;</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title">parquetWrite</span><span class="params">()</span> </span>&#123;</span><br><span class="line">        SparkConf conf = <span class="keyword">new</span> SparkConf()</span><br><span class="line">                .setAppName(<span class="string">"test"</span>)</span><br><span class="line">                .setMaster(<span class="string">"local"</span>)</span><br><span class="line">                .set(<span class="string">"spark.executor.memory"</span>, <span class="string">"8g"</span>)</span><br><span class="line">                .set(<span class="string">"spark.rdd.compress true"</span>,<span class="string">"true"</span>)</span><br><span class="line">                .set(<span class="string">"spark.testing.memory"</span>, <span class="string">"2147480000"</span>);</span><br><span class="line">        JavaSparkContext sc = <span class="keyword">new</span> JavaSparkContext(conf);</span><br><span class="line">        SQLContext sqlContext = <span class="keyword">new</span> SQLContext(sc);</span><br><span class="line">        sqlContext.setConf(<span class="string">"spark.sql.dialect"</span>, <span class="string">"sql"</span>);</span><br><span class="line">        Properties connectionProperties = <span class="keyword">new</span> Properties();</span><br><span class="line">        connectionProperties.put(<span class="string">"driver"</span>,<span class="string">"com.mysql.jdbc.Driver"</span>);</span><br><span class="line">        connectionProperties.put(<span class="string">"user"</span>, username);</span><br><span class="line">        connectionProperties.put(<span class="string">"password"</span>, passwd);</span><br><span class="line">        DataFrame vectorTable = sqlContext.read().jdbc(url, table,connectionProperties);</span><br><span class="line">        vectorTable.saveAsParquetFile(<span class="string">"jdData"</span>);<span class="comment">// 在项目里生成文件，当然你也可以写绝对路径</span></span><br><span class="line">    &#125;</span><br></pre></td></tr></table></figure>

<p>结果为<br><img src="http://upload-images.jianshu.io/upload_images/3167229-7550885467f50a4c.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="parquet包含的文件"><br>关于内容我没有去深究（官网有详细的解释说明），但是能看得出文件是成功的生成了~
那么接下来看看如何去读parquet的文件内容</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title">parquetRead</span><span class="params">()</span> </span>&#123;</span><br><span class="line">        SparkConf conf = <span class="keyword">new</span> SparkConf()</span><br><span class="line">                .setAppName(<span class="string">"test"</span>)</span><br><span class="line">                .setMaster(<span class="string">"local[4]"</span>)</span><br><span class="line">                .set(<span class="string">"spark.executor.memory"</span>, <span class="string">"8g"</span>)</span><br><span class="line">                .set(<span class="string">"spark.rdd.compress true"</span>,<span class="string">"true"</span>)</span><br><span class="line">                .set(<span class="string">"spark.testing.memory"</span>, <span class="string">"2147480000"</span>);</span><br><span class="line">        JavaSparkContext sc = <span class="keyword">new</span> JavaSparkContext(conf);</span><br><span class="line">        SQLContext sqlContext = <span class="keyword">new</span> SQLContext(sc);</span><br><span class="line">        sqlContext.parquetFile(<span class="string">"jdData"</span>).registerTempTable(<span class="string">"jdData"</span>);</span><br><span class="line">        DataFrame dbaClos = sqlContext.sql(<span class="string">"select * from jdData where Title = 'DBA' and Title &lt;&gt; '' "</span>);</span><br><span class="line">        dbaClos.show();</span><br><span class="line">    &#125;</span><br></pre></td></tr></table></figure>

<p>运行日志<br><img src="http://upload-images.jianshu.io/upload_images/3167229-d37fea96d79e5e91.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240" alt="日志"><br>当你拿到<code>sqlContext</code>的时候，你就可以使用spark提供的sql去查找你想要的数据，要提醒一下， spark支持的sql跟mysql，oracle的不太一样，详细参考<a href="http://spark.apache.org/docs/latest/sql-programming-guide.html#supported-hive-features" target="_blank" rel="noopener">官网帮助文档</a></p>
<h3 id="后续"><a href="#后续" class="headerlink" title="后续"></a>后续</h3><p>　　这里说一下我为什么使用parquet，我现在测试服务器的内存是16g，项目在启动的时候需要加载其他的模型、数据等文件，而且数据虽然才有220多万条，但是存储的内容比较多，所以就会导致内存不足，使用了parquet之后，数据大小直接被压缩为原来的三分之一！！！而且spark对parquet文件的支持近乎完美，所以使用parquet之后，我完全可以不用考虑内存分配不足的问题了。</p>

      
    </div>

    

    
    
    

    <div>
          
            
          
    </div>

    

    
      <div>
        <div style="padding: 10px 0; margin: 20px auto; width: 90%; text-align: center;">
  <div></div>
  <button id="rewardButton" disable="enable" onclick="var qr = document.getElementById('QR'); if (qr.style.display === 'none') {qr.style.display='block';} else {qr.style.display='none'}">
    <span>打赏</span>
  </button>
  <div id="QR" style="display: none;">

    
      <div id="wechat" style="display: inline-block">
        <img id="wechat_qr" src="https://i.loli.net/2017/12/29/5a4632028eaeb.jpg" alt="wangxc 微信支付">
        <p>微信支付</p>
      </div>
    

    
      <div id="alipay" style="display: inline-block">
        <img id="alipay_qr" src="https://i.loli.net/2017/12/29/5a46320234b1d.jpg" alt="wangxc 支付宝">
        <p>支付宝</p>
      </div>
    

    

  </div>
</div>

      </div>
    

    
<div>
  
    <div>
    
        <div style="text-align:center;color: #ccc;font-size:14px;">
              -------------本文结束<i class="fa fa-paw"></i>感谢您的阅读-------------
        </div>
    
</div>

  
</div>
    <footer class="post-footer">
      
        <div class="post-tags">
          
            <a href="/tags/Spark/" rel="tag"><i class="fa fa-tag"></i> Spark</a>
          
            <a href="/tags/Parquet/" rel="tag"><i class="fa fa-tag"></i> Parquet</a>
          
            <a href="/tags/大数据/" rel="tag"><i class="fa fa-tag"></i> 大数据</a>
          
        </div>
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2017/04/24/Maven系列（六）配合GitLab持续集成（CI）/" rel="next" title="Maven系列（六）配合GitLab持续集成（CI）">
                <i class="fa fa-chevron-left"></i> Maven系列（六）配合GitLab持续集成（CI）
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2017/04/24/Javaer，你必须要了解的ExecutorService/" rel="prev" title="Javaer，你必须要了解的ExecutorService">
                Javaer，你必须要了解的ExecutorService <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          

  
    <div class="comments" id="comments">
    </div>
  



        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview-wrap">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview-wrap sidebar-panel">
        <div class="site-overview">
          <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
            
              <img class="site-author-image" itemprop="image" src="/images/avatar.jpg" alt="wangxc">
            
              <p class="site-author-name" itemprop="name">wangxc</p>
              <p class="site-description motion-element" itemprop="description">革命尚未成功，同志还需努力~</p>
          </div>

          
            <nav class="site-state motion-element">
              
                <div class="site-state-item site-state-posts">
                
                  <a href="/archives/">
                
                    <span class="site-state-item-count">83</span>
                    <span class="site-state-item-name">日志</span>
                  </a>
                </div>
              

              
                
                
                <div class="site-state-item site-state-categories">
                  <a href="/categories/index.html">
                    
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                    <span class="site-state-item-count">6</span>
                    <span class="site-state-item-name">分类</span>
                  </a>
                </div>
              

              
                
                
                <div class="site-state-item site-state-tags">
                  <a href="/tags/index.html">
                    
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                      
                    
                    <span class="site-state-item-count">124</span>
                    <span class="site-state-item-name">标签</span>
                  </a>
                </div>
              
            </nav>
          

          

          
            <div class="links-of-author motion-element">
              
                <span class="links-of-author-item">
                  <a href="https://github.com/vector4wang" target="_blank" title="GitHub" rel="external nofollow"><i class="fa fa-fw fa-github"></i>GitHub</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="mailto:vector4wang@qq.com" target="_blank" title="E-Mail" rel="external nofollow"><i class="fa fa-fw fa-envelope"></i>E-Mail</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="https://www.jianshu.com/u/223a1314e818" target="_blank" title="简书" rel="external nofollow"><i class="fa fa-fw fa-book"></i>简书</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="http://blog.csdn.net/qqhjqs?viewmode=contents" target="_blank" title="CSDN" rel="external nofollow"><i class="fa fa-fw fa-crosshairs"></i>CSDN</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="https://twitter.com/BMHJQS" target="_blank" title="Twitter" rel="external nofollow"><i class="fa fa-fw fa-twitter"></i>Twitter</a>
                  
                </span>
              
                <span class="links-of-author-item">
                  <a href="https://www.facebook.com/" target="_blank" title="FB Page" rel="external nofollow"><i class="fa fa-fw fa-facebook-official"></i>FB Page</a>
                  
                </span>
              
            </div>
          

          
          

          
          

          
            
          
          

        </div>
      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-3"><a class="nav-link" href="#前言"><span class="nav-number">1.</span> <span class="nav-text">前言</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#Parquet"><span class="nav-number">2.</span> <span class="nav-text">Parquet</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#示例"><span class="nav-number">3.</span> <span class="nav-text">示例</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#后续"><span class="nav-number">4.</span> <span class="nav-text">后续</span></a></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

      
        <div class="back-to-top">
          <i class="fa fa-arrow-up"></i>
          
            <span id="scrollpercent"><span>0</span>%</span>
          
        </div>
      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <script async src="https://dn-lbstatics.qbox.me/busuanzi/2.3/busuanzi.pure.mini.js"></script>
<div class="copyright">&copy; <span itemprop="copyrightYear">2020</span>
  <span class="with-love" id="animate">
    <i class="fa fa-user"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">wangxc</span>

  

  
</div>


  


<div class="powered-by">
<i class="fa fa-user-md"></i><span id="busuanzi_container_site_pv">
  本站访客数:<span id="busuanzi_value_site_uv"></span>
</span>
</div>


  <div class="powered-by">由 <a class="theme-link" target="_blank" rel="external nofollow" href="https://hexo.io">Hexo</a> 强力驱动 v3.8.0</div>



  <span class="post-meta-divider">|</span>



  <div class="theme-info">主题 &mdash; <a class="theme-link" target="_blank" rel="external nofollow" href="https://theme-next.org">NexT.Mist</a> v6.3.0</div>




        








        
      </div>
    </footer>

    

    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>


























  
  
    <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
  

  
  
    <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
  


  


  <script type="text/javascript" src="/js/src/utils.js?v=6.3.0"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=6.3.0"></script>



  
  

  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=6.3.0"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=6.3.0"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=6.3.0"></script>



  



	





  





  








  <script src="//cdn1.lncld.net/static/js/3.0.4/av-min.js"></script>
  
  
  <script src="//unpkg.com/valine/dist/Valine.min.js"></script>
  
  <script type="text/javascript">
    var GUEST = ['nick','mail','link'];
    var guest = 'nick,mail,link';
    guest = guest.split(',').filter(function (item) {
      return GUEST.indexOf(item)>-1;
    });
    new Valine({
        el: '#comments' ,
        verify: false,
        notify: false,
        appId: 'YdGFHBfuDwObik8qwCeRAWgr-gzGzoHsz',
        appKey: 'oFpbJwmMQf98DfFDUlzE5Fai',
        placeholder: 'Just go go',
        avatar:'mm',
        guest_info:guest,
        pageSize:'10' || 10,
    });
  </script>



  





  

  

  

  

  
  

  

  

  

  

  

</body>
</html>
